*************************************************
* Prepare firm-Level Data
*************************************************
cd "${data_dir}"
use "firmdata.dta", clear


* Rename key variables:

rename Start_dato birth
rename Ophoer_dato death
rename GF_OMS sales
rename GF_BAV va
rename GF_AARSV empl2
rename GF_VIRKFKOD ownership

* Define firm age:
gen startyear = year(birth)
gen firmage = aar - startyear + 1


* Define industry variables based on DB93 and DB03:
sort cvrnr aar

* gr009 and gr027 remain unchanged after the small update in 2003
destring GF_gr009_DB93, gen(sector1)
replace sector1 = 0 if sector1 == .
tab GF_gr009_DB03
destring GF_gr009_DB03, gen(sector2) force
replace sector2 = 0 if sector2 == .
gen sector009 = sector1 + sector2
* DB03 should be available until 2008, but in case they are not, 
* keep 2007 industry stable in 2008
by cvrnr: replace sector009 = sector009[_n-1] if aar == 2008 & (sector009 == 0 | sector009 == .) & aar==aar[_n-1]+1
drop sector1 sector2 GF_gr009_DB93 GF_gr009_DB03

gen sector1 = substr(GF_gr027_DB93,1,2)
destring sector1, replace
replace sector1 = 0 if sector1 == .
gen sector2 = substr(GF_gr027_DB03,1,2)
destring sector2, replace force
replace sector2 = 0 if sector2 == .
gen sector027 = sector1 + sector2
by cvrnr: replace sector027 = sector027[_n-1] if aar == 2008 & (sector027 == 0 | sector027 == .) & aar==aar[_n-1]+1
drop sector1 sector2 GF_gr027_DB93 GF_gr027_DB03

* gr053 also has very few changes in 2003

* this variable is only used as a control in robustness regressions 
* and will have an implicit interaction with time in case of updates
gen part1 = substr(GF_gr053_DB93,1,2)
gen part2 = substr(GF_gr053_DB93,4,2)
gen sector1 = part1 + part2
drop part1 part2
destring sector1, replace force
replace sector1 = 0 if sector1 == .
gen part1 = substr(GF_gr053_DB03,1,2)
gen part2 = substr(GF_gr053_DB03,4,2)
gen sector2 = part1 + part2
drop part1 part2
destring sector2, replace force
replace sector2 = 0 if sector2 == .
gen sector053 = sector1 + sector2
by cvrnr: replace sector053 = sector053[_n-1] if aar == 2008 & (sector053 == 0 | sector053 == .) & aar==aar[_n-1]+1
drop sector1 sector2 GF_gr053_DB93 GF_gr053_DB03

keep aar cvrnr ownership birth death firmage sales va empl2 sector009 sector027 sector053

sort aar cvrnr

save "GF_firmdata.dta", replace

